knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
library(janitor)
library(tidyverse)
library(ggplot2)
library(readxl)
library(openxlsx)
library(data.table)
library(pointblank)
library(haven)
library(naniar)
library(pointblank)
library(heatmaply)
# Create function to :1.) Join two 5-year period data, and 2.) Add the n years of availability
join_transform <- function(table_1, table_2){
table_all <- full_join(table_1, table_2, by = c("countries_areas", "data_available"))
table_clean <-
table_all %>%
mutate_at(vars(n.x, n.y), ~replace(., is.na(.), 0)) %>%
mutate (n = n.x + n.y) %>%
select(-c(n.x, n.y))
return(table_clean)
}
# Create Function to Transform Table 4: Number of Years Available for
#Live Births, Infant Deaths, and Deaths (General) by Countries
t4_transform <- function(table, years, table_id){
# Prep original
table_clean <-
table[- 1, ] %>%
select (c("...1", contains("number"))) %>%
rename_all(~c("countries_areas",
"live_births",
"deaths",
"infant_deaths")) %>%
mutate(year=countries_areas) %>%
replace_with_na(replace = list(countries_areas = years)) %>%
separate(countries_areas, c("countries_areas"), " - ") %>%
fill(countries_areas) %>%
replace_with_na_all(condition = ~.x == "...") %>%
replace_with_na_all(condition = ~.x == "-")
# Create count dataframe
table_count <-
table_clean %>%
gather(key="data_available", value="value", -c(countries_areas, year)) %>%
mutate (countries_areas = gsub('[0-9]+', '', countries_areas)) %>%
filter(value != is.na(.)) %>%
group_by(countries_areas, data_available) %>%
summarise(n=n())
return(table_count)
}
# Load Table 4s
table4_DYB_all_2 <- read_excel("table04.xlsx", skip = 4) #2014-2018
table4_DYB_all_1 <- read_excel("Table04 (1).xlsx", skip = 4) #2009-2013
# Transform Table 4s
table4_1 <- t4_transform(table4_DYB_all_1,
c("2009", "2010", "2011", "2012", "2013"))
table4_2 <- t4_transform(table4_DYB_all_2,
c("2014", "2015", "2016", "2017", "2018"))
# Merge Table 4s :2009-2018
table4_clean <- join_transform(table4_1, table4_2)
# Create Function to Transform Table 22-25: Number of Years Available for
# Marriage and Divorces by Countries
ur_transform <- function(data_all, data_type) {
data <-
data_all [-1,] %>%
select (c("...1", contains("20"))) %>%
select (1:6) %>%
rename_at(.vars = 1, ~c("countries_areas")) %>%
separate(countries_areas, c("countries_areas"), " - ") %>%
filter (!(countries_areas %in% c("Urban", "Rural"))) %>%
replace_with_na(replace = list(countries_areas = "Total")) %>%
fill(countries_areas) %>%
replace_with_na_all(condition = ~.x == "...") %>%
mutate (countries_areas = gsub('[0-9]+', '', countries_areas))
data_final <-
data %>%
gather(key="year", value=value, -c(countries_areas)) %>%
filter(value != is.na(.)) %>%
mutate(data_available = data_type) %>%
group_by(countries_areas, data_available) %>%
summarise(n=n())
return(data_final)
}
#Load Tables 22-25
table22_DYB <- read_excel("table22.xlsx", skip = 4) #2014-2018
table23_DYB <- read_excel("table23_2013.xlsx", skip = 4) #2009-2013
table24_DYB <- read_excel("table24.xlsx", skip = 4) #2014-2018
table25_DYB <- read_excel("Table25.xlsx", skip = 4) #2009-2013
#Transform Tables 22-25
table_m2 <- ur_transform(table22_DYB, "marriages")
table_m1 <- ur_transform(table23_DYB, "marriages")
table_d2 <- ur_transform(table24_DYB, "divorces")
table_d1 <- ur_transform(table25_DYB, "divorces")
table_mall <- join_transform(table_m1, table_m2)
table_dall <- join_transform(table_d1, table_d2)
# Load Table 12s
table12_DYB_all_2 <- read_excel("./Gen Death/table12_2.xlsx", skip = 4) #2014-2018
table12_DYB_all_1 <- read_excel("./Gen Death/table12_1.xlsx", skip = 4) #2009-2013
# Transform Table 4s
table12_1 <- ur_transform(table12_DYB_all_1, "foetal_deaths")
table12_2 <- ur_transform(table12_DYB_all_2, "foetal_deaths")
# Merge Table 4s :2009-2018
table12_clean <- join_transform(table12_1, table12_2)
# Load UNPA Countries + Regions Lookup
lookup_table_all <-
as_tibble(read_excel("UNFPA_countries.xlsx"))
all_tables_dyb <- rbind(table_mall, table_dall, table4_clean, table12_clean)
data_available <- c("live_births", "infant_deaths", "marriages", "divorces", "deaths", "foetal_deaths")
lookup_score<- function(avail, data) {
# Compute completeness Score by Category
complete_score <-
data %>%
group_by(countries_areas) %>%
summarise(complete = n())
# Compute completeness Score by Overall Sum by Country
count_score <-
aggregate(data$n, by = list(data$countries_areas), sum)
count_score <-
rename(count_score, "countries_areas" = Group.1)
# Join lookup and indicators
available_count_lookup <- left_join(merge(avail, lookup_table_all),
complete_score, by="countries_areas")
available_count_lookup <-
rename(available_count_lookup, "data_available" = x)
available_count_lookup <- left_join(available_count_lookup, count_score, by = "countries_areas")
}
data_available_lookup <- lookup_score(data_available, all_tables_dyb)
## `summarise()` ungrouping output (override with `.groups` argument)
#Create a UNFPA Life Course Approach Complete Data
UNFPA_lifeapp <-
left_join(data_available_lookup, all_tables_dyb,
by = c("countries_areas", "data_available"), ignore_case = T)
UNFPA_lifeapp <-
UNFPA_lifeapp %>%
mutate_all(~replace(., is.na(.), 0)) %>%
# Create score :0.75 on completeness across categories + 0.25 completeness in years
mutate(score = ((complete/5)*0.85) # scored by completeness across categories
+ ((x/50)*0.15)) %>% # scored by completeness across years
mutate (countries_areas = reorder(countries_areas, score))
head(data_available_lookup, 5)
## data_available countries_areas UNFPA_Regions complete x
## 1 live_births Angola ESA 3 3
## 2 infant_deaths Angola ESA 3 3
## 3 marriages Angola ESA 3 3
## 4 divorces Angola ESA 3 3
## 5 deaths Angola ESA 3 3
# Set parameters for levels and labels
levels_pref <- c("foetal_deaths", "live_births", "infant_deaths", "marriages", "divorces", "deaths")
labels_pref <- c("Foetal Deaths", "Live Births", "Infant Deaths", "Marriages", "Divorces", "Deaths")
# Create function to produce Heatmaps
num_years_plot <- function(region)
ggplot(subset(UNFPA_lifeapp, UNFPA_Regions %in% region),
aes(x = factor(data_available,
levels = levels_pref), y = countries_areas, fill = n)) +
geom_tile() +
scale_fill_distiller(name = "Number of\nYear(s) Available", palette = "Blues",
direction = +1, breaks = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
limits=c(0,10)) +
theme_minimal() +
labs(title = paste("Available Data on Registered Vital Events in",
ifelse(region == "AP", "Asia and the Pacific",
ifelse(region == "AS", "Arab States",
ifelse(region == "EECA", "Eastern Europe and Central Asia",
ifelse(region == "ESA", "Eastern and Southern Africa",
ifelse(region == "LAC", "Latin America and Caribbean",
ifelse(region == "WCA", "Western and Central Africa")))))), "Region",
"\nby Country and Vital Event(s), 2009-2018",
sep = " "),
x = "Registered Vital Events Data", y = "Countries/Areas") +
scale_x_discrete(labels = labels_pref)
# Run heatmaps
num_years_plot("AS")
num_years_plot("AP")
num_years_plot("EECA")
num_years_plot("ESA")
num_years_plot("LAC")
num_years_plot("WCA")
This visualization will be remodelled with the following prompts: - Reordered by countries (most complete to least, top-bottom) - Facets removed
# Create table 4 transformation function that includes years and recodes values to binary
# (1 = "Available", 0 = "Unavailable)
t4_year_transform <- function(t4_table, years)
t4_table[- 1, ] %>%
select (c("...1", contains("number"))) %>%
rename_all(~c("countries_areas", "live_births", "deaths", "infant_deaths")) %>%
mutate(year=countries_areas) %>%
replace_with_na(replace = list(countries_areas = years)) %>%
separate(countries_areas, c("countries_areas"), " - ") %>%
fill(countries_areas) %>%
replace_with_na_all(condition = ~.x == "...") %>%
replace_with_na_all(condition = ~.x == "-") %>%
filter(!(live_births %in% NA & deaths %in% NA & infant_deaths %in% NA)) %>%
mutate_at(vars(live_births, deaths, infant_deaths), ~replace(., is.na(.), 0)) %>%
mutate(live_births = ifelse(live_births > 0, 1, 0)) %>%
mutate(deaths = ifelse(deaths > 0, 1, 0)) %>%
mutate(infant_deaths = ifelse(infant_deaths > 0, 1, 0)) %>%
gather(-c(countries_areas, year), key = "data_available", value = "availability") %>%
mutate (countries_areas = gsub('[0-9]+', '', countries_areas))
# Transform table 4s
table4_1_year <-
t4_year_transform(table4_DYB_all_1,
c("2009", "2010", "2011", "2012", "2013"))
table4_2_year <-
t4_year_transform(table4_DYB_all_2,
c("2014", "2015", "2016", "2017", "2018"))
# Create table 22-25 transformation function that includes years and recodes values to binary
# (1 = "Available", 0 = "Unavailable)
ur_year_transform <- function(data_ur, names, data_type){
data_ur[- 1, ] %>%
select (c("...1", contains("20"))) %>%
select (1:6) %>%
rename_all(~names) %>%
separate(countries_areas, c("countries_areas"), " - ") %>%
filter (!(countries_areas %in% c("Urban", "Rural"))) %>%
replace_with_na(replace = list(countries_areas = "Total")) %>%
fill(countries_areas) %>%
replace_with_na_all(condition = ~.x == "...") %>%
replace_with_na_all(condition = ~.x == "-") %>%
mutate (countries_areas = gsub('[0-9]+', '', countries_areas)) %>%
gather(-c(countries_areas), key = "year", value = "availability") %>%
mutate_at(vars(availability), ~replace(., is.na(.), 0)) %>%
mutate(availability = ifelse(availability > 0, 1, 0)) %>%
mutate(data_available = data_type) %>%
select(countries_areas, year, data_available,availability)
}
# Transform 22-25, Marriages and Divorces
table_m2_year <-
ur_year_transform(table22_DYB,
c("countries_areas", "2014", "2015", "2016", "2017", "2018"), "marriages")
table_m1_year <-
ur_year_transform(table23_DYB,
c("countries_areas", "2009", "2010", "2011", "2012", "2013"), "marriages")
table_d2_year <-
ur_year_transform(table24_DYB,
c("countries_areas", "2014", "2015", "2016", "2017", "2018"), "divorces")
table_d1_year <-
ur_year_transform(table25_DYB,
c("countries_areas", "2009", "2010", "2011", "2012", "2013"), "divorces")
# Transform 12, Foetal Deaths
table_f2_year <-
ur_year_transform(table12_DYB_all_2,
c("countries_areas", "2014", "2015", "2016", "2017", "2018"), "foetal_deaths")
table_f1_year <-
ur_year_transform(table12_DYB_all_1,
c("countries_areas", "2009", "2010", "2011", "2012", "2013"), "foetal_deaths")
# Bind all Data
all_tables_year <- rbind(table4_1_year, table4_2_year, table_d1_year,
table_d2_year, table_m1_year, table_m2_year,
table_f2_year, table_f1_year)
# Create Lookup :Country, Region, Available Data, Year
data_year_lookup <-
data_available_lookup %>% # Reuse previous lookup that already includes Available Data
select(data_available:UNFPA_Regions) %>%
merge(., c("2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018")) %>%
# Merge available numbers of years possible
rename("year" = y)
# Merge lookup
table_year_unfpa <-
data_year_lookup %>%
left_join(., all_tables_year, by=c("countries_areas", "data_available", "year")) %>%
mutate_at(vars(availability), ~replace(., is.na(.), 0)) %>%
mutate(UNFPA_Regions = reorder(UNFPA_Regions, desc(availability)))
all_plot_sum <- function(data_type, data_type_name) {
ggplot(subset(table_year_unfpa, data_available %in% data_type), aes(x = year, y = countries_areas, fill= availability, color = "grey")) +
geom_tile() +
facet_grid(UNFPA_Regions~., space = "free_y", scale = "free_y") +
scale_fill_distiller(palette = "Purples", direction = +1, breaks = c(0, 1), limits=c(0,1)) +
theme(axis.text.x = element_text(size =0.5)) +
theme_classic()+
theme(legend.position = "none")+
labs(title = paste("Available Data of Registered",
data_type_name, "in All UNFPA Regions"),
subtitle= "by Country and Year(s), 2009-2018",
x = "Year", y = "Country/Area")
}
all_plot_sum("live_births", "Live Births")
all_plot_sum("infant_deaths", "Infant Deaths")
all_plot_sum("marriages", "Marriages")
all_plot_sum("divorces", "Divorces")
all_plot_sum("deaths", "Deaths")
all_plot_sum("foetal_deaths", "Foetal Deaths")
# Create Function to Generate Detailed (Green) Heatmaps
det_heatmap <- function(data, region, title, labels, levels) {
ggplot(subset(data, UNFPA_Regions %in% region), aes(x = factor(data_available, levels = levels), y = countries_areas, fill= n)) +
geom_tile() +
scale_fill_distiller(name = "Number of \n Year(s) Available", palette = "Greens",
direction = +1, breaks = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10),
limits=c(0,10)) +
theme_minimal() +
labs(x = paste(title, "Data", sep = " "), y = "Countries/Areas") +
labs(title = paste("Available Data on Registered",
title,
"in the",
ifelse(region == "AP", "Asia and the Pacific",
ifelse(region == "AS", "Arab States",
ifelse(region == "EECA", "Eastern Europe and Central Asia",
ifelse(region == "ESA", "Eastern and Southern Africa",
ifelse(region == "LAC", "Latin America and Caribbean",
ifelse(region == "WCA", "Western and Central Africa")))))),
"Region",
"\nby Country and Disaggregation Variable(s), 2009-2018",
sep = " "),
x = paste(title, "Data"), y = "Countries/Areas") +
theme(axis.text.x=element_text(angle=45,hjust=1)) +
scale_x_discrete(labels = labels)
}
# Table Live Births
all_bt <- list.files(path = "./Birth Regs",
pattern = "table",
full.names = TRUE)
all_bts_excel <-
lapply(all_bt, function (i) {
x = read_excel(i, skip = 4)
})
# Create Freq Table Function for non-Urban/Rural DYB Tables
ap_transform <- function(data, data_type) {
data[- 1, ] %>%
select (1:2) %>%
rename_all(~c("countries_areas", "total")) %>%
mutate(countries_areas = gsub('[0-9]+', '', countries_areas)) %>%
filter (!(countries_areas %in% c(" - ", " +", " (C)", " (+U)", " (|)", " (U)",
" (+C)", "* (C)", "* (+C)", "* (+U)",
"Unknown - Inconnu", "* (U)"))) %>%
replace_with_na(replace = list(countries_areas = "Total")) %>%
fill(countries_areas) %>%
filter(!(total == is.na(.))) %>%
separate(countries_areas, c("countries_areas"), " - ") %>%
group_by(countries_areas) %>%
summarise(n=n()) %>%
mutate(data_available = data_type)
}
# Transform all live births data
# Will abbreviate this to lapply(data[1:2], ur_transform) and lapply(data[3:6], ap_transform)
bt_ur_clean1 <- ur_transform(all_bts_excel[[1]], "births_urbanrural")
bt_ur_clean2 <- ur_transform(all_bts_excel[[2]], "births_urbanrural")
bt_am_clean1 <- ap_transform(all_bts_excel[[3]], "births_age_mother")
bt_am_clean2 <- ap_transform(all_bts_excel[[4]], "births_age_mother")
bt_af_clean1 <- ap_transform(all_bts_excel[[5]], "births_age_father")
bt_af_clean2 <- ap_transform(all_bts_excel[[6]], "births_age_father")
bt_ur_all <- join_transform(bt_ur_clean1, bt_ur_clean2)
bt_am_all <- join_transform(bt_am_clean1, bt_am_clean2)
bt_af_all <- join_transform(bt_af_clean1, bt_af_clean2)
# Create Freq Table Function for Supplementary Tables
st_clean <- function(data) {
data %>%
clean_names() %>%
group_by(country_or_area, year) %>%
summarise(n=n()) %>%
select(country_or_area, year) %>%
rename("countries_areas" = country_or_area) %>%
group_by(countries_areas) %>%
summarise(n=n())
}
# Load Supplementary Tables
all_lb <- list.files(path = "./Birth Regs", # insert path to folder
pattern = "LB", # do not change this
full.names = TRUE)
all_lbs_csv <-
lapply(all_lb, function (i) {
x = read_csv(i)
})
# Create Frequency Tables for All STs
all_births_long <-
lapply(all_lbs_csv, function (i) {
x = st_clean(i)
})
all_births_long <-
Map(cbind, all_births_long, data_available =
list("birth_order", "birth_bw_sex","birth_gestation",
"birth_month", "birth_plural", "birth_ord_sex",
"birth_mar_len"))
all_births_long <- rbindlist(all_births_long)
all_births_long <- select(all_births_long, c(countries_areas, data_available, n))
all_births_tables <- bind_rows(all_births_long, bt_ur_all, bt_af_all, bt_am_all)
# Create Live Births Lookup
types_birth_long <- c("birth_order", "birth_bw_sex","birth_gestation", "birth_month",
"birth_plural", "birth_ord_sex", "birth_mar_len", "births_urbanrural",
"births_age_mother", "births_age_father")
lookup_birth_unfpa <- lookup_score(types_birth_long, all_births_tables)
# Create UNFPA-specific disaggregated Live Births data
all_births_unfpa <- left_join(lookup_birth_unfpa, all_births_tables, by = c("countries_areas", "data_available"))
births_unfpa_clean <-
all_births_unfpa %>%
mutate_all(~replace(., is.na(.), 0)) %>%
mutate(score = ((complete/5)*0.85)+((x/50)*0.15)) %>%
mutate (countries_areas = reorder(countries_areas, score))
# Generate Live Birth Heatmaps
birth_labels <- c("Urban & Rural", "Age of Mother", "Age of Father", "Order & Age of Mother", "Order & Sex", "Birth Month", "Weight & Sex", "Gestational Age", "Marriage Duration\nof Parents", "Birth Plurality")
birth_prefs <- c("births_urbanrural", "births_age_mother", "births_age_father", "birth_order", "birth_ord_sex", "birth_month", "birth_bw_sex", "birth_gestation", "birth_mar_len", "birth_plural")
det_heatmap(births_unfpa_clean, "AP", "Live Births", birth_labels, birth_prefs)
det_heatmap(births_unfpa_clean, "AS", "Live Births", birth_labels, birth_prefs)
det_heatmap(births_unfpa_clean, "EECA", "Live Births", birth_labels, birth_prefs)
det_heatmap(births_unfpa_clean, "ESA", "Live Births", birth_labels, birth_prefs)
det_heatmap(births_unfpa_clean, "LAC", "Live Births", birth_labels, birth_prefs)
det_heatmap(births_unfpa_clean, "WCA", "Live Births",birth_labels, birth_prefs)
# Load Marriages and Divorces Tables
all_mdt <- list.files(path = "./MarriageDiv",
pattern = "table",
full.names = TRUE)
all_mdt_excel <-
lapply(all_mdt, function (i) {
x = read_excel(i, skip = 4)
})
md_am_clean1 <- ap_transform(all_mdt_excel[[1]], "marriage_age")
md_am_clean2 <- ap_transform(all_mdt_excel[[2]], "marriage_age")
md_am_all <- join_transform(md_am_clean1, md_am_clean2)
# Load Supplementary Tables :Marriages and Divorces
all_mds <- list.files(path = "./MarriageDiv",
pattern = "MD",
full.names = TRUE)
all_mds_csv <-
lapply(all_mds, function (i) {
x = read_csv(i)
})
# Transform all Marriages and Divorces STs into Frequencies
all_mardivs_long <-
lapply(all_mds_csv, function (i) {
x = st_clean(i)
})
all_mardivs_long <-
Map(cbind, all_mardivs_long, data_available =
list("marriage_cross", "divorce_ur", "marriage_1st_age", "marriage_ur"))
all_mardivs_long <- rbindlist(all_mardivs_long)
all_mardivs_long <- select(all_mardivs_long, c(countries_areas, data_available, n))
all_mardivs_tables <- bind_rows(all_mardivs_long, md_am_clean1, md_am_clean2)
# Create Marriage and Divorces Lookup
lookup_mardiv_unfpa <- lookup_score(c("marriage_cross", "divorce_ur", "marriage_1st_age", "marriage_ur", "marriage_age"), all_mardivs_tables)
# Generate all UNFPA-specific marriage and divorce availability data
all_mardiv_unfpa <- left_join(lookup_mardiv_unfpa, all_mardivs_tables, by = c("countries_areas", "data_available"))
mardiv_unfpa_clean <-
all_mardiv_unfpa %>%
mutate_all(~replace(., is.na(.), 0)) %>%
mutate(score = ((complete/5)*0.85)+((x/50)*0.15)) %>%
mutate (countries_areas = reorder(countries_areas, score))
# Generate Marriage/Divorces Heatmaps
mardiv_levels <- c("marriage_ur", "marriage_cross", "marriage_age", "marriage_1st_age", "divorce_ur")
mardiv_labels <- c("Urban/Rural\n(Marriages)", "Prior Marital Status\n(Marriages)", "Age\n(Marriages)", "Age at First Marriage\n(Marriages)", "Urban/Rural\n(Divorces)")
det_heatmap(mardiv_unfpa_clean, "AP", "Marriages and Divorces", mardiv_labels, mardiv_levels)
det_heatmap(mardiv_unfpa_clean, "AS", "Marriages and Divorces", mardiv_labels, mardiv_levels)
det_heatmap(mardiv_unfpa_clean, "EECA", "Marriages and Divorces", mardiv_labels, mardiv_levels)
det_heatmap(mardiv_unfpa_clean, "ESA", "Marriages and Divorces", mardiv_labels, mardiv_levels)
det_heatmap(mardiv_unfpa_clean, "LAC", "Marriages and Divorces",mardiv_labels, mardiv_levels)
det_heatmap(mardiv_unfpa_clean, "WCA", "Marriages and Divorces", mardiv_labels, mardiv_levels)
# Supplementary Tables :General and Foetal Deaths
all_gds <- list.files(path = "./Gen Death",
pattern = "GD",
full.names = TRUE)
all_gds_csv <-
lapply(all_gds, function (i) {
x = read_csv(i)
})
all_fds <- list.files(path = "./Gen Death",
pattern = "FD",
full.names = TRUE)
all_fds_csv <-
lapply(all_fds, function (i) {
x = read_csv(i)
})
id_agesex<-read_excel("./Gen Death/IFagesex.xlsx")
id_urbanrural<-read_csv("./Gen Death/IFurbanrural.csv")
all_gds_long <-
lapply(all_gds_csv, function (i) {
x = st_clean(i)
})
all_gds_long <-
Map(cbind, all_gds_long, data_available =
list("gm_cause", "gm_age_sex_ur", "gm_month", "gm_sex_ur"))
all_fds_long <-
lapply(all_fds_csv, function (i) {
x = st_clean(i)
})
all_fds_long <-
Map(cbind, all_fds_long, data_available =
list("ab_urbanrural", "fd_agewoman", "fd_gest_age", "fd_sex_ur"))
id_agesex_long<- ap_transform(id_agesex, "inf_death_age_sex")
id_urbanrural_long <-
id_urbanrural %>%
st_clean() %>%
cbind(data_available = "inf_death_urbanrural")
all_gfd_tables <- bind_rows(rbindlist(all_gds_long), rbindlist(all_fds_long), all_fds_long, id_agesex_long, id_urbanrural_long)
# I just identified a problem in the wrangling of Infant Death tabs by age and sex. I will come back to this.
id_agesex_long<-
id_agesex[- 1, ] %>%
select (1:2) %>%
rename_all(~c("countries_areas", "total")) %>%
mutate(countries_areas = gsub('[0-9]+', '', countries_areas)) %>%
separate(countries_areas, c("countries_areas"), " - ") %>%
filter (!(countries_areas %in% c(" - ", "+", " (C)", "(+U)", "(|)", " (U)",
" (+C)", "*(C)", "*(+C)", "*(+U)",
"Unknown - Inconnu", "* (U)", "days", "months", "Less than day")))
lookup_gfd_unfpa <- lookup_score(c("inf_death_age_sex", "inf_death_urbanrural", "gm_cause", "gm_age_sex_ur", "gm_month", "gm_sex_ur", "ab_urbanrural", "fd_agewoman", "fd_gest_age", "fd_sex_ur"), all_gfd_tables)
all_gfd_unfpa <- left_join(lookup_gfd_unfpa, all_gfd_tables, by = c("countries_areas", "data_available"))
gfd_unfpa_clean <-
all_gfd_unfpa %>%
mutate_all(~replace(., is.na(.), 0)) %>%
mutate(score = ((complete/5)*0.85)+((x/50)*0.15)) %>%
mutate (countries_areas = reorder(countries_areas, score))
GF_levels <- c("fd_sex_ur", "fd_agewoman", "fd_gest_age", "inf_death_urbanrural", "inf_death_age_sex","ab_urbanrural", "gm_sex_ur", "gm_age_sex_ur", "gm_cause", "gm_month")
GF_labels <- c("Urban/Rural\n(Foetal Deaths)", "Age of Mother\n(Foetal Deaths)", "Gestational Age\n(Foetal Deaths)", "Urban/Rural\n(Infant Deaths)", "Age & Sex\n(Infant Deaths)", "Urban/rural\n(Abortions)", "Sex & Urban/Rural\n(General Deaths)", "Age, Sex, & Urban/Rural\n(General Deaths)", "Cause\n(General Deaths)", "Month\n(General Deaths)")
det_heatmap(gfd_unfpa_clean, "AP", "Foetal Deaths, Abortions, and General Deaths\n", GF_labels, GF_levels)
det_heatmap(gfd_unfpa_clean, "AS", "Foetal Deaths, Abortions, and General Deaths\n", GF_labels, GF_levels)
det_heatmap(gfd_unfpa_clean, "EECA", "Foetal Deaths, Abortions, and General Deaths\n", GF_labels, GF_levels)
det_heatmap(gfd_unfpa_clean, "ESA", "Foetal Deaths, Abortions, and General Deaths\n", GF_labels, GF_levels)
det_heatmap(gfd_unfpa_clean, "LAC", "Foetal Deaths, Abortions, and General Deaths\n", GF_labels, GF_levels)
det_heatmap(gfd_unfpa_clean, "WCA", "Foetal Deaths, Abortions, and General Deaths\n", GF_labels, GF_levels) # Will fix this title wrapping
Live Births -Table 10 - Live births by age of mother and sex of child, general and age-specific fertility rates: latest available year, 2009-2018 -Table 11 - Live births and live birth rates by age of father: latest available year, 2009-2018
Marriages -Table 23 - Marriages by age of groom and by age of bride: latest available year, 2009-2018
Divorces -Table 25 - Divorces and percentage distribution by duration of marriage, latest available year: 2009-2018
Foetal Deaths and Abortions -Table 14 - Legally induced abortions by age and number of previous live births of women: latest available year, 2009-2018
Infant Deaths -Table 16 - Infant deaths and infant mortality rates by age and sex, latest available year: 2009-2018
General Deaths -Table 19 - Deaths by age and sex, age-specific death rates by sex: latest available year, 2009-2018
# Test steps with Table 19 - General Death, Age and Sex
all_gds_age_sex <- list.files(path = "./Gen Death", # Load all General Deaths by Age and Sex, Latest Available Year
pattern = "table19",
full.names = TRUE)
all_gds_age_sex_excel <- # Convert into Excel
lapply(all_gds_age_sex, function (i) {
x = read_excel(i)
})
# Create Freq Table Function for non-Urban/Rural DYB Tables, modified
la_transform_mod <- function(data_list, data_type) {
data_list_tfr<-
lapply(data_list, function (i) {
x = ap_transform(i, data_type)
})
data_1<-rbindlist( data_list_tfr)
data_1<-
data_1 %>%
group_by(countries_areas) %>%
summarise(n=n()) %>%
mutate("data_available" = data_type)
return(data_1)
}
#Create dataset of previous years
all_gds_age_sex_long <- la_transform_mod(all_gds_age_sex_excel, "gd_age_sex")
## Warning: Expected 1 pieces. Additional pieces discarded in 105 rows [5, 9, 13,
## 15, 19, 25, 32, 40, 42, 46, 52, 54, 58, 64, 66, 70, 76, 78, 80, 82, ...].
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: Expected 1 pieces. Additional pieces discarded in 106 rows [5, 11, 13,
## 15, 19, 25, 36, 38, 42, 50, 52, 56, 62, 64, 68, 76, 78, 80, 82, 84, ...].
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: Expected 1 pieces. Additional pieces discarded in 107 rows [6, 16, 18,
## 20, 22, 26, 32, 36, 44, 46, 50, 58, 60, 64, 68, 71, 79, 81, 83, 85, ...].
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: Expected 1 pieces. Additional pieces discarded in 109 rows [3, 8, 18,
## 20, 22, 26, 28, 30, 32, 36, 40, 48, 50, 54, 61, 63, 67, 71, 74, 82, ...].
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: Expected 1 pieces. Additional pieces discarded in 107 rows [3, 8, 12,
## 20, 22, 24, 28, 30, 32, 34, 40, 44, 53, 57, 61, 68, 72, 76, 79, 87, ...].
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: Expected 1 pieces. Additional pieces discarded in 104 rows [3, 8, 12,
## 20, 22, 26, 28, 30, 36, 38, 49, 53, 60, 64, 66, 70, 71, 79, 81, 83, ...].
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: Expected 1 pieces. Additional pieces discarded in 108 rows [3, 9, 13,
## 21, 25, 29, 31, 33, 39, 41, 46, 54, 55, 57, 64, 68, 70, 76, 79, 87, ...].
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
lookup_gfd_unfpa_mod <- lookup_score(c("inf_death_age_sex", "inf_death_urbanrural", "gm_cause", "gm_age_sex_ur", "gm_month", "gm_sex_ur", "gd_age_sex", "ab_urbanrural", "fd_agewoman", "fd_gest_age", "fd_sex_ur"), all_gfd_tables)
all_gfd_tables_mod <- bind_rows(rbindlist(all_gds_long), rbindlist(all_fds_long), all_fds_long, id_agesex_long, all_gds_age_sex_long, id_agesex_long, id_urbanrural_long)
all_gfd_unfpa_mod <- left_join(lookup_gfd_unfpa_mod, all_gfd_tables_mod, by = c("countries_areas", "data_available"))
gfd_unfpa_clean_mod <-
all_gfd_unfpa_mod %>%
mutate_all(~replace(., is.na(.), 0)) %>%
mutate(score = ((complete/5)*0.85)+((x/50)*0.15)) %>%
mutate (countries_areas = reorder(countries_areas, score))
GF_levels_mod <- c("fd_sex_ur", "fd_agewoman", "fd_gest_age", "inf_death_urbanrural", "inf_death_age_sex","ab_urbanrural", "gm_sex_ur", "gd_age_sex", "gm_age_sex_ur", "gm_cause", "gm_month")
GF_labels_mod <- c("Urban/Rural\n(Foetal Deaths)", "Age of Mother\n(Foetal Deaths)", "Gestational Age\n(Foetal Deaths)", "Urban/Rural\n(Infant Deaths)", "Age & Sex\n(Infant Deaths)", "Urban/rural\n(Abortions)", "Sex & Urban/Rural\n(General Deaths)", "Age, Sex, & Urban/Rural\n(General Deaths)", "Age & Sex\n(General Deaths)", "Cause\n(General Deaths)", "Month\n(General Deaths)")
det_heatmap(gfd_unfpa_clean_mod, "AP", "Foetal Deaths, Abortions, and General Deaths\n", GF_labels_mod, GF_levels_mod)
det_heatmap(gfd_unfpa_clean_mod, "AS", "Foetal Deaths, Abortions, and General Deaths\n", GF_labels_mod, GF_levels_mod)
det_heatmap(gfd_unfpa_clean_mod, "EECA", "Foetal Deaths, Abortions, and General Deaths\n", GF_labels_mod, GF_levels_mod)
det_heatmap(gfd_unfpa_clean_mod, "ESA", "Foetal Deaths, Abortions, and General Deaths\n", GF_labels_mod, GF_levels_mod)
det_heatmap(gfd_unfpa_clean_mod, "LAC", "Foetal Deaths, Abortions, and General Deaths\n", GF_labels_mod, GF_levels_mod)
det_heatmap(gfd_unfpa_clean_mod, "WCA", "Foetal Deaths, Abortions, and General Deaths\n",GF_labels_mod, GF_levels_mod)